//new parent distribution: Mean and SD of parent human capital from 2000 Census
//open data
use "$temp/census_p25_sample", clear

***sample restrictions: first check correlation of head/spousal HC
//keep if relate == 1 //restriction to household heads/men
keep if age>=36 & age<=54 //age restriction
keep if nchild>0
//keep if nchild==1
su incwage, d

keep if relate == 1 | relate == 2

//create family income
preserve
collapse (sum) incwage perwt, by(serial)
ren incwage faminc
tempfile faminc
xtile fam_pctile = faminc [fw=perwt], nq(100)
save `faminc'
restore

merge m:1 serial using `faminc', keep(match) nogen
save "$temp/census_pctiles", replace


use "$temp/census_pctiles", clear
keep if fam_pctile>=23 & fam_pctile<=27 //slacken the range so that we have at least 100 individuals in each state
save "$temp/census_p25", replace


****normalize by mean earnings
preserve
	use "$temp/wage_norm", clear
	su mean
	local norm `r(mean)'
restore

****normalize income
ren statefip statefips
merge m:1 statefips using "$temp/skill_prices_all", keep(match) nogen
replace incwage = incwage/skill_price_2000 if !coll //normalize by skill prices
replace incwage = incwage/skill_price_coll_2000 if coll //normalize by skill prices
replace incwage = incwage/`norm' //normalize by mean PSID earnings

su incwage if !coll & relate==1 & sex==1, d
su incwage if coll & relate==1 & sex==1, d //looks different from PSID -- why?

***after all this, earnings now map 1-1 to parent HC (will have to move to wages down the road)
replace incwage = incwage * (2080/hours) //hours normalization
ren incwage hc
su hc [fw=perwt], d
drop if hc>6
su hc [fw=perwt] if !coll & relate==1 & sex==1, d
su hc [fw=perwt] if coll & relate==1 & sex==1, d //now more reasonable!

su hc if relate==1, d // for initial condition

//correlation of spousal human capital and education
keep if relate==1 | relate==2
gen temp = coll if relate==1
bys serial: egen headcoll = max(temp)
drop temp

gen temp = coll if relate==2
bys serial: egen wifecoll = max(temp)
drop temp

gen temp = hc if relate==1
bys serial: egen headhc = max(temp)
drop temp 

gen temp = hc if relate==2
bys serial: egen wifehc = max(temp)
drop temp

gen nilf = (hours==0)
gen temp = nilf if relate==2
bys serial: egen wife_nowork =  max(temp)
drop temp

//spousal coll, hc
gen spouse_coll = .
replace spouse_coll = headcoll if relate==2
replace spouse_coll = wifecoll if relate==1

gen spouse_hc = .
replace spouse_hc = headhc if relate==2
replace spouse_hc = wifehc if relate==1

//test
corr hc spouse_hc if spouse_hc!=. & relate==1 //way less orrelated than I thought; not that it matters all that much.
tab coll spouse_coll if relate==1

//for now sticking with assortive mating
keep if relate == 1
gen married = (marst==1)
drop if flag_drop
ren wife_nowork spouse_nowork

preserve
collapse (mean) married [fw=perwt], by(statefip)
save "$temp/census_p25_marriage_rates", replace
restore




preserve
*****proportion of married parents in 25th national income percentile****
use cz coll_pooled_pooled_p25* married_pooled_pooled_p* has_dad_pooled_pooled_p* has_mom_pooled_pooled_p* two_par_pooled_pooled_p* kfr_pooled_pooled_p25 kfr_stycz_pooled_pooled_p25 using "$data/Opp_Atlas/cz_outcomes", clear
drop *_se
merge 1:1 cz using "$data/LOO/onlinedata8", keep(match) nogen
keep cz stateabbrv pop2000 has* two* married* coll* kfr*

su kfr*
replace pop2000 = subinstr(pop2000, ", ", "", .)
replace pop2000 = subinstr(pop2000, ",", "", .)
replace pop2000 = subinstr(pop2000, " ", "", .)
replace pop2000 = trim(pop2000)
destring pop2000, replace
collapse (mean) married* has* two* coll* kfr* [fw = pop2000], by(stateabbrv)

//merge on state fips codes
ren state state
merge m:1 state using "$data/Crosswalks/state_fips_crosswalk", keep(match) nogen

//clean up and save
drop if statefips == 11
keep statefips two_par_pooled_pooled_p25 married_pooled_pooled_p25 coll kfr*
save "$temp/chetty_marriage_data", replace

//export
keep statefips two_par_pooled_pooled_p25 coll kfr*
save "$temp/chetty_state_data_round2", replace

merge 1:1 statefips using "$temp/census_p25_marriage_rates", keep(match) nogen
su two_par married
corr married two_par
corr kfr_pooled_pooled_p25 kfr_stycz*
su kfr*


order statefips two
compress
sort statefips
export delimited "$dir/Model/utilities/parent_marriage_probs_p25.csv", replace novarn
restore





//probabilites of each type and distributions of HC
keep married headcoll spouse_coll headhc spouse_hc spouse_nowork perwt statefips

//types, by college edu: single, married with non-working spouse, married with working HS spouse, married with working coll spouse
gen count = 1
gen type = 1
replace type = 2 if !headcoll & married & spouse_nowork
replace type = 3 if !headcoll & married & !spouse_nowork & !spouse_coll
replace type = 4 if !headcoll & married & !spouse_nowork & spouse_coll
replace type = 5 if headcoll & !married
replace type = 6 if headcoll & married & spouse_nowork
replace type = 7 if headcoll & married & !spouse_nowork & !spouse_coll
replace type = 8 if headcoll & married & !spouse_nowork & spouse_coll

tab type



collapse (sum) count (mean) hc_mean = headhc spouse_hc_mean = spouse_hc (sd) hc_sd = headhc spouse_hc_sd = spouse_hc [fw=perwt], by(type statefips)
merge m:1 statefips using "$temp/chetty_marriage_data", keep(match) nogen
drop married_pooled
sort statefips type
gen par_married = (type==2 | type==3 | type==4 | type==6 | type==7 | type==8)
bys statefips par_married: egen totalcount = total(count)
replace two_par = 1-two_par if par_married == 0
gen frac = (count/totalcount) * two_par
sort statefips type


drop count totalcount
sort statefips type

//generate log-normal paramters
//gen hc_mean_log = log(hc_mean^2 / sqrt(hc_mean^2 + hc_sd^2))
//gen hc_sd_log = log(1 + (hc_sd^2)/(hc_mean^2))
keep statefips type frac hc_mean hc_sd spouse_hc_mean spouse_hc_sd
order statefips type frac hc_mean hc_sd spouse_hc_mean spouse_hc_sd
export delimited "$model/utilities/state_parent_distributions_p25.csv", novarn replace
